library(tidyverse)
Registered S3 method overwritten by 'dplyr':
method from
print.rowwise_df
Registered S3 methods overwritten by 'dbplyr':
method from
print.tbl_lazy
print.tbl_sql
[37m-- [1mAttaching packages[22m ---------------------------------- tidyverse 1.2.1.[31m9000[37m --[39m
[37m[32mv[37m [34mggplot2[37m 3.2.1 [32mv[37m [34mpurrr [37m 0.3.2
[32mv[37m [34mtibble [37m 2.1.3 [32mv[37m [34mdplyr [37m 0.8.3
[32mv[37m [34mtidyr [37m 0.8.3 [32mv[37m [34mstringr[37m 1.4.0
[32mv[37m [34mreadr [37m 1.3.1 [32mv[37m [34mforcats[37m 0.4.0[39m
package 㤼㸱ggplot2㤼㸲 was built under R version 3.6.1[37m-- [1mConflicts[22m ------------------------------------------ tidyverse_conflicts() --
[31mx[37m [34mdplyr[37m::[32mfilter()[37m masks [34mstats[37m::filter()
[31mx[37m [34mdplyr[37m::[32mlag()[37m masks [34mstats[37m::lag()[39m
library(tidytext)
package 㤼㸱tidytext㤼㸲 was built under R version 3.6.1
library(jsonlite)
Attaching package: 㤼㸱jsonlite㤼㸲
The following object is masked from 㤼㸱package:purrr㤼㸲:
flatten
metadata <- read_csv("./2020-03-13/all_sources_metadata_2020-03-13.csv")
Parsed with column specification:
cols(
sha = [31mcol_character()[39m,
source_x = [31mcol_character()[39m,
title = [31mcol_character()[39m,
doi = [31mcol_character()[39m,
pmcid = [33mcol_logical()[39m,
pubmed_id = [32mcol_double()[39m,
license = [31mcol_character()[39m,
abstract = [31mcol_character()[39m,
publish_time = [32mcol_double()[39m,
authors = [31mcol_character()[39m,
journal = [31mcol_character()[39m,
`Microsoft Academic Paper ID` = [32mcol_double()[39m,
`WHO #Covidence` = [31mcol_character()[39m,
has_full_text = [33mcol_logical()[39m
)
44397 parsing failures.
row col expected actual file
1237 pmcid 1/0/T/F/TRUE/FALSE PMC1054884 './2020-03-13/all_sources_metadata_2020-03-13.csv'
1237 publish_time no trailing characters Mar 1 './2020-03-13/all_sources_metadata_2020-03-13.csv'
1238 pmcid 1/0/T/F/TRUE/FALSE PMC1065064 './2020-03-13/all_sources_metadata_2020-03-13.csv'
1238 publish_time no trailing characters Oct 14 './2020-03-13/all_sources_metadata_2020-03-13.csv'
1239 pmcid 1/0/T/F/TRUE/FALSE PMC1084330 './2020-03-13/all_sources_metadata_2020-03-13.csv'
.... ............ ...................... .......... ..................................................
See problems(...) for more details.
View(metadata)
metadata %>%
count(license, sort = T)
metadata %>%
count(publish_time, sort = T)
metadata %>%
filter(has_full_text)
metadata %>%
filter(!is.na(abstract))
NA
LS0tDQp0aXRsZTogIkNPVklELTE5IE9wZW4gUmVzZWFyY2ggRGF0YXNldCBDaGFsbGVuZ2UgKENPUkQtMTkpIg0Kc3VidGl0bGU6ICJFeHBsb3JhdG9yeSBEYXRhIEFuYWx5c2lzIg0Kb3V0cHV0OiBodG1sX25vdGVib29rDQotLS0NCg0KYGBge3J9DQpsaWJyYXJ5KHRpZHl2ZXJzZSkNCmxpYnJhcnkodGlkeXRleHQpDQpsaWJyYXJ5KGpzb25saXRlKQ0KDQoNCm1ldGFkYXRhIDwtIHJlYWRfY3N2KCIuLzIwMjAtMDMtMTMvYWxsX3NvdXJjZXNfbWV0YWRhdGFfMjAyMC0wMy0xMy5jc3YiKQ0KDQpWaWV3KG1ldGFkYXRhKQ0KDQptZXRhZGF0YSAlPiUgDQogIGNvdW50KGxpY2Vuc2UsIHNvcnQgPSBUKQ0KDQptZXRhZGF0YSAlPiUgDQogIGNvdW50KHB1Ymxpc2hfdGltZSwgc29ydCA9IFQpDQoNCm1ldGFkYXRhICU+JSANCiAgZmlsdGVyKGhhc19mdWxsX3RleHQpDQoNCm1ldGFkYXRhICU+JSANCiAgZmlsdGVyKCFpcy5uYShhYnN0cmFjdCkpDQoNCmBgYA0KDQojIEV4dHJhY3RpbmcgdGV4dCBmb20gYWxsIHRlaCBmdWxsIHBhcGVycw0KDQpgYGB7cn0NCg0KanNvbl9vYmplY3RzIDwtIGRpcigiLi8yMDIwLTAzLTEzL2NvbW1fdXNlX3N1YnNldC9jb21tX3VzZV9zdWJzZXQvIiwgZnVsbC5uYW1lcz1UKSAlPiUgDQogIG1hcChyZWFkX2pzb24pDQoNCg0KDQoNCmBgYA0KDQoNCg0KDQoNCg0KDQoNCg0KDQo=